use inequality_data_70s_correct, clear 
set more off

*Limit observations to estimation sample (842<=qyear<=845) and prediction sample (801<=qyear<=813)
keep if (ref_year>=1961 & ref_year<=1984)
*Drop observations for years in which we do not have all observations (1979) 
drop if ref_year==1979 


*create weights for hosehold (recreate BLS weights) for percentiles 
gen finlwt=wgt20/fam_size

*create total non-housing expenditures for qregressions
gen double totexp_nh = totexp2-totowndwe if (ref_year==1961) 
replace totexp_nh = totexp-d_owndwell if (ref_year==1972 | ref_year==1973)
replace totexp_nh = totexp-(owndwepq+owndwecq) if (ref_year>=1980)

*Create cpi_u_rs_adj_80 to convert all data to 1980 dollars 
gen cpi_u_rs_adj_80 = .
replace cpi_u_rs_adj_80 = 2.0898 if (ref_year==1961)  
replace cpi_u_rs_adj_80 = 1.7188 if (ref_year==1972) 
replace cpi_u_rs_adj_80 = 1.6345 if (ref_year==1973) 
replace cpi_u_rs_adj_80 = 1 if (ref_year==1980) 
replace cpi_u_rs_adj_80 = 0.9204 if (ref_year==1981) 
replace cpi_u_rs_adj_80 = 0.8746 if (ref_year==1982) 
replace cpi_u_rs_adj_80 = 0.8453 if (ref_year==1983)
replace cpi_u_rs_adj_80 = 0.8177 if (ref_year==1984) 

*generate variables in real terms 
foreach var of varlist totpval totexp_nh renteq houseval {
gen r_`var' = cpi_u_rs_adj_80*`var' 
}

gen double r_ln_totpval=0
replace r_ln_totpval=log(r_totpval) if (r_totpval>0 & r_totpval!=. & ref_year>=1980)
replace r_ln_totpval=log(r_houseval) if (r_houseval>0 & r_houseval!=. & (ref_year==1972 | ref_year==1973))
gen double r_ln_totexp_nh = 0
replace r_ln_totexp_nh=log(r_totexp_nh) if (r_totexp_nh>0 & r_totexp_nh!=.)
gen double r_ln_renteq = 0
replace r_ln_renteq=log(r_renteq) if (r_renteq>0 & r_renteq!=.)

*GEN DEMOGRAPHIC VARIABLES 
*Generate dummy variables for family type 
*Single woman no kids
gen swnk=0
replace swnk=1 if (sex_ref==2 & fam_type==8) 
*Single mother
gen sm=0
replace sm=1 if (sex_ref==2 & fam_type==7)
*married couple with children 
gen mm=0
replace mm=1 if (fam_type>=2 & fam_type<=4 & perslt18>0 & perslt18!=.)

*Generate a consistent variable for location (smsa) 
*smsa2 = 1 if in smsa; =2 if urban but not in smsa; = 3 if rural not in smsa
gen smsa2 = 1 if (ref_year>=1980 & ref_year<=1984 & smsastat==1) 
replace smsa2 = 2 if (ref_year>=1980 & ref_year<=1984 & smsastat==2 & bls_urbn==1) 
replace smsa2 = 3 if (ref_year>=1980 & ref_year<=1984 & smsastat==2 & bls_urbn==2)
replace smsa2 = 1 if (ref_year==1961 & (locat==1 | locat==2 | locat==3 | locat==4 | locat==6 | locat ==7))
replace smsa2 = 2 if (ref_year==1961 & locat==5)
replace smsa2 = 3 if (ref_year==1961 & (locat==8 | locat==9))
replace smsa2 = 1 if ((ref_year==1972 | ref_year==1973) & (locat==1 | locat==2 | locat==3 | locat == 4 | locat==5 | locat ==6))
replace smsa2 = 2 if ((ref_year==1972 | ref_year==1973) & locat==7)
replace smsa2 = 3 if ((ref_year==1972 | ref_year==1973) & locat==8)

*Gen education indicators and dummies (note, ed_type exists for earlier years, drop before creating new variable)
rename ed_type old_ed_type
gen ed_type=.
replace ed_type = old_ed_type if (ref_year<1970)
replace ed_type=1 if (ref_year>=1972 & ref_year<=1973 & (educ_ref==1 | educ_ref==2 | educ_ref==6))
replace ed_type=2 if (ref_year>=1972 & ref_year<=1973 & educ_ref==3)
replace ed_type=3 if (ref_year>=1972 & ref_year<=1973 & educ_ref==4)
replace ed_type=4 if (ref_year>=1972 & ref_year<=1973 & educ_ref==5)
replace ed_type=1 if (qyear>=801 & qyear<=955 & (educ_ref==1 | educ_ref==2 | educ_ref==7))
replace ed_type=2 if (qyear>=801 & qyear<=955 & educ_ref==3)
replace ed_type=3 if (qyear>=801 & qyear<=955 & educ_ref==4)
replace ed_type=4 if (qyear>=801 & qyear<=955 & (educ_ref==5 | educ_ref==6))
forvalues x=1/4 {
gen ed`x'=0
replace ed`x'=1 if ed_type==`x'
}

*Gen age indicators 
gen age_grp=.
replace age_grp=1 if (age_ref>=0 & age_ref<=34)
replace age_grp=2 if (age_ref>=35 & age_ref<=49)
replace age_grp=3 if (age_ref>=50 & age_ref<=64)
replace age_grp=4 if (age_ref>=65)
forvalues x=1/4 {
gen age`x'=0
replace age`x'=1 if age_grp==`x'
}

*Drop observations for which we have incomplete data for houseval
drop if (ref_year==1961 & houseval==99) 
drop if ((ref_year==1972 | ref_year==1973) & houseval==88888 & (cutenure==1 | cutenure==2 | cutenure==3))

save data_for_renteq_pred , replace 

log using home_pred_qreg_new , replace 

*1980-81 
*Quantile Regressions
keep if (ref_year>=1980 & ref_year<=1984)
xi i.rooms i.smsa2
forvalues x=1/99 { 
qreg r_ln_renteq r_ln_totpval r_ln_totexp_nh fam_size sm mm swnk ed2-ed4 age2-age4 cen_ac win_ac _Irooms_2-_Irooms_25 _Ismsa2_2 _Ismsa2_3 if (qyear>=842 & qyear<=845 & n_hous==3 & (cutenure==1 | cutenure==2 | cutenure==3)) , q(`x')
predict double yhat_q`x'
gen double pred_home_q`x' = exp(yhat_q`x')
replace pred_home_q`x' = pred_home_q`x'*1.0868 if (ref_year==1981)
replace pred_home_q`x' = pred_home_q`x'*1.144 if (ref_year==1982) 
replace pred_home_q`x' = pred_home_q`x'*1.1839 if (ref_year==1983)
replace pred_home_q`x' = pred_home_q`x'*1.223 if (ref_year==1984)
}
tempfile temp61 temp72 temp73 temp80 temp81 temp83 temp84
preserve 
keep if (ref_year==1980) 
save "`temp80'" , replace 
restore 
preserve
keep if (ref_year==1981) 
save "`temp81'" , replace 
restore 
preserve 
keep if (ref_year==1983)
save "`temp83'" , replace 
restore 
preserve 
keep if (ref_year==1984) 
save "`temp84'" , replace 
restore 
*Collect data by year, starting with 1st quantile 
foreach y in 80 81 83 84 { 
use ref_year finlwt fincbtax newid age_ref qyear sex_ref fam_type perslt18 totexp_nh r_totexp_nh r_ln_totexp_nh totpval r_totpval r_ln_totpval renteq r_renteq r_ln_renteq swnk sm mm n_hous cutenure pred_home_q1 using "`temp`y''" , clear 
*Stack predictions (quantile 2-99 on top) using data from same year
forvalues i=2/99 { 
append using "`temp`y''" , keep(ref_year finlwt fincbtax newid age_ref qyear sex_ref fam_type perslt18 totexp_nh r_totexp_nh r_ln_totexp_nh totpval r_totpval r_ln_totpval renteq r_renteq r_ln_renteq swnk sm mm n_hous cutenure pred_home_q`i')
}
*Check to make sure there are no missing observations in the predicted housing columns 
egen nmiss = rownonmiss(pred_home_q1-pred_home_q99)
tab nmiss 
*Create single variable for predicted housing 
gen double pred_home = . 
forvalues i=1/99 { 
replace pred_home = pred_home_q`i' if pred_home_q`i'!=.
}
*Find deciles for predicted and actual rent 
_pctile renteq [pw=finlwt], n(10) 
scalar p10=r(r1)
scalar p20=r(r2)
scalar p30=r(r3)
scalar p40=r(r4)
scalar p50=r(r5)
scalar p60=r(r6)
scalar p70=r(r7)
scalar p80=r(r8)
scalar p90=r(r9) 

*predicted rent
_pctile pred_home [pw=finlwt] , n(10) 
scalar pp10 = r(r1)
scalar pp20 = r(r2)
scalar pp30 = r(r3)
scalar pp40 = r(r4)
scalar pp50 = r(r5)
scalar pp60 = r(r6)
scalar pp70 = r(r7)
scalar pp80 = r(r8)
scalar pp90 = r(r9)

display "Actual Rent 19`y'"
display p10
display p20
display p30
display p40
display p50
display p60
display p70
display p80
display p90

display "Predicted Rent 19`y'"
display pp10
display pp20
display pp30
display pp40
display pp50
display pp60
display pp70
display pp80
display pp90


drop nmiss pred_home_q*
save home_pred_stacked_`y' , replace 
}

use home_pred_stacked_84 , clear
*Fit of predictions in estimation sample 
*limit to observations in estimation sample
keep if (qyear>=842 & qyear<=845 & n_hous==3 & (cutenure==1 | cutenure==2 | cutenure==3))
*Find deciles for predicted and actual rent 
_pctile renteq [pw=finlwt], n(10) 
scalar p10=r(r1)
scalar p20=r(r2)
scalar p30=r(r3)
scalar p40=r(r4)
scalar p50=r(r5)
scalar p60=r(r6)
scalar p70=r(r7)
scalar p80=r(r8)
scalar p90=r(r9) 

*predicted rent
_pctile pred_home [pw=finlwt] , n(10) 
scalar pp10 = r(r1)
scalar pp20 = r(r2)
scalar pp30 = r(r3)
scalar pp40 = r(r4)
scalar pp50 = r(r5)
scalar pp60 = r(r6)
scalar pp70 = r(r7)
scalar pp80 = r(r8)
scalar pp90 = r(r9)

* Actual Rent 1984, estimation sample only 
display p10
display p20
display p30
display p40
display p50
display p60
display p70
display p80
display p90

*Predicted Rent 1984, estimation sample only 
display pp10
display pp20
display pp30
display pp40
display pp50
display pp60
display pp70
display pp80
display pp90

use home_pred_stacked_80 , clear 
foreach y in 81 83 84 {
append using home_pred_stacked_`y'
}
sort ref_year
save renteq_pred_stacked_80s , replace 

*1960s 
use data_for_renteq_pred , clear 
keep if (ref_year==1961 | ref_year==1972 | ref_year==1973) 

gen houseval_ind = .
replace houseval_ind=houseval if (ref_year==1961)
replace houseval_ind=10 if (ref_year==1972 & houseval<6278.81)
replace houseval_ind=20 if (ref_year==1972 & houseval>=6278.81 & houseval<12557.62)
replace houseval_ind=31 if (ref_year==1972 & houseval>=12557.62 & houseval<15697.02)
replace houseval_ind=32 if (ref_year==1972 & houseval>=15697.02 & houseval<18836.42)
replace houseval_ind=41 if (ref_year==1972 & houseval>=18836.42 & houseval<21975.83)
replace houseval_ind=42 if (ref_year==1972 & houseval>=21975.83 & houseval<25115.23)
replace houseval_ind=51 if (ref_year==1972 & houseval>=25115.23 & houseval<28254.64)
replace houseval_ind=52 if (ref_year==1972 & houseval>=28254.64 & houseval<31394.04)
replace houseval_ind=60 if (ref_year==1972 & houseval>=31394.04 & houseval<43951.66)
replace houseval_ind=71 if (ref_year==1972 & houseval>=43951.66 & houseval<62788.08)
replace houseval_ind=72 if (ref_year==1972 & houseval>=62788.08 & houseval!=.)


replace houseval_ind=10 if (ref_year==1973 & houseval<6621.22)
replace houseval_ind=20 if (ref_year==1973 & houseval>=6621.22 & houseval<13242.44)
replace houseval_ind=31 if (ref_year==1973 & houseval>=13242.44 & houseval<16553.05)
replace houseval_ind=32 if (ref_year==1973 & houseval>=16553.05 & houseval<19863.66)
replace houseval_ind=41 if (ref_year==1973 & houseval>=19863.66 & houseval<23174.27)
replace houseval_ind=42 if (ref_year==1973 & houseval>=23174.27 & houseval<26484.88)
replace houseval_ind=51 if (ref_year==1973 & houseval>=26484.88 & houseval<29795.49)
replace houseval_ind=52 if (ref_year==1973 & houseval>=29795.49 & houseval<33106.11)
replace houseval_ind=60 if (ref_year==1973 & houseval>=33106.11 & houseval<46348.55)
replace houseval_ind=71 if (ref_year==1973 & houseval>=46348.55 & houseval<66212.21)
replace houseval_ind=72 if (ref_year==1973 & houseval>=66212.21 & houseval!=.)

*Create consistent location (SMSA) code
gen locat2 = . 
replace locat2=1 if (ref_year==1961 & locat==1)
replace locat2=2 if (ref_year==1961 & locat==2)
replace locat2=3 if (ref_year==1961 & (locat==3 | locat==4 | locat==5))
replace locat2=4 if (ref_year==1961 & (locat==6 | locat==7 | locat==8 | locat==9))

replace locat2=1 if ((ref_year==1972 | ref_year==1973) & (locat==1 | locat==3 | locat==5))
replace locat2=2 if ((ref_year==1972 | ref_year==1973) & (locat==2 | locat==4 | locat==6))
replace locat2=3 if ((ref_year==1972 | ref_year==1973) & locat==7)
replace locat2=4 if ((ref_year==1972 | ref_year==1973) & locat==8)

*create indicators for region, location, housevalue indicator 
xi i.houseval_ind i.region i.locat2
*qregressions
forvalues x=1/99 { 
qreg r_ln_renteq r_ln_totexp_nh _Ihouseval__20-_Ihouseval__72 fam_size sm mm swnk ed2-ed4 age2-age4 _Iregion_* _Ilocat2_* if ((cutenure==1 | cutenure==2 | cutenure==3) & r_ln_renteq>0 & houseval_ind!=99 & (ref_year==1972 | ref_year==1973)) , q(`x') 
predict double yhat_q`x'
gen double pred_home_q`x' = exp(yhat_q`x')
replace pred_home_q`x' = pred_home_q`x'*.4568 if (ref_year==1961) 
replace pred_home_q`x' = pred_home_q`x'*.5736 if (ref_year==1972)
replace pred_home_q`x' = pred_home_q`x'*.6049 if (ref_year==1973)
}

preserve 
keep if (ref_year==1961) 
save "`temp61'" , replace 
restore 
preserve 
keep if (ref_year==1972) 
save "`temp72'" , replace 
restore 
preserve 
keep if (ref_year==1973) 
save "`temp73'" , replace 
restore 

*Collect data by year, starting with 1st quantile 
foreach y in 61 72 73 { 
use ref_year finlwt fincbtax newid age_ref qyear sex_ref fam_type perslt18 totexp_nh r_totexp_nh r_ln_totexp_nh totpval r_totpval r_ln_totpval renteq r_renteq r_ln_renteq swnk sm mm n_hous cutenure houseval_ind pred_home_q1 using "`temp`y''" , clear 
*Stack predictions (quantile 2-99 on top) using data from same year
forvalues i=2/99 { 
append using "`temp`y''" , keep(ref_year finlwt fincbtax newid age_ref qyear sex_ref fam_type perslt18 totexp_nh r_totexp_nh r_ln_totexp_nh totpval r_totpval r_ln_totpval renteq r_renteq r_ln_renteq swnk sm mm n_hous cutenure houseval_ind pred_home_q`i')
}
*Check to make sure there are no missing observations in the predicted housing columns 
egen nmiss = rownonmiss(pred_home_q1-pred_home_q99)
tab nmiss 
*Create single variable for predicted housing 
gen double pred_home = . 
forvalues i=1/99 { 
replace pred_home = pred_home_q`i' if pred_home_q`i'!=.
}
*Find deciles for predicted and actual rent 
_pctile renteq [pw=finlwt], n(10) 
scalar p10=r(r1)
scalar p20=r(r2)
scalar p30=r(r3)
scalar p40=r(r4)
scalar p50=r(r5)
scalar p60=r(r6)
scalar p70=r(r7)
scalar p80=r(r8)
scalar p90=r(r9) 

*predicted rent
_pctile pred_home [pw=finlwt] , n(10) 
scalar pp10 = r(r1)
scalar pp20 = r(r2)
scalar pp30 = r(r3)
scalar pp40 = r(r4)
scalar pp50 = r(r5)
scalar pp60 = r(r6)
scalar pp70 = r(r7)
scalar pp80 = r(r8)
scalar pp90 = r(r9)

display "Actual Rent 19`y'"
display p10
display p20
display p30
display p40
display p50
display p60
display p70
display p80
display p90

display "Predicted Rent 19`y'"
display pp10
display pp20
display pp30
display pp40
display pp50
display pp60
display pp70
display pp80
display pp90

drop nmiss pred_home_q*
save home_pred_stacked_60_`y' , replace 
}
*check estimation sample fit
use home_pred_stacked_60_73 , clear
append using home_pred_stacked_60_72
preserve 
keep if ((cutenure==1 | cutenure==2 | cutenure==3) & r_ln_renteq>0 & houseval_ind!=99 & (ref_year==1972 | ref_year==1973))
*Find deciles for predicted and actual rent 
_pctile renteq [pw=finlwt], n(10) 
scalar p10=r(r1)
scalar p20=r(r2)
scalar p30=r(r3)
scalar p40=r(r4)
scalar p50=r(r5)
scalar p60=r(r6)
scalar p70=r(r7)
scalar p80=r(r8)
scalar p90=r(r9) 

*predicted rent
_pctile pred_home [pw=finlwt] , n(10) 
scalar pp10 = r(r1)
scalar pp20 = r(r2)
scalar pp30 = r(r3)
scalar pp40 = r(r4)
scalar pp50 = r(r5)
scalar pp60 = r(r6)
scalar pp70 = r(r7)
scalar pp80 = r(r8)
scalar pp90 = r(r9)

* Actual Rent 1972/73, estimation sample only 
display p10
display p20
display p30
display p40
display p50
display p60
display p70
display p80
display p90

*Predicted Rent 1972/73, estimation sample only 
display pp10
display pp20
display pp30
display pp40
display pp50
display pp60
display pp70
display pp80
display pp90

restore 

append using home_pred_stacked_60_61 
sort ref_year
save renteq_pred_stacked_60s , replace
log close 
